#encoding=utf-8
from pyspark import SparkConf, SparkContext
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
conf = SparkConf().setMaster("local").setAppName("My App") #创建Sparkconf对象配置应用
sc = SparkContext(conf = conf) #基于SparkConf创建SparkContext对象

def analysis(input, output):
    rdd = sc.textFile(input)#创建一个字符串RDD
    words = rdd.flatMap(lambda x: x.split(" "))
    counts = words.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x+y)
    results = counts.collect()
    sortCount = sorted(results, key=lambda results : results[1], reverse=True)#词频从大到小排序
    writer = open(output,"w")
    for (i, j) in sortCount:
        res = "%s = %s\n" % (i, j)
        writer.write(res.encode("utf-8"))
    writer.close()

analysis("total_comment.txt", "result_total.txt")
analysis("total_sense_com.txt", "result_total_sense.txt")
analysis("total_country.txt", "result_total_country.txt")


sc.stop()